;;   This file is part of scheme-GNUnet, a partial Scheme port of GNUnet.
;;   Copyright (C) 2003, 2004, 2005, 2006, 2008, 2009, 2010, 2020 GNUnet e.V.
;;
;;   GNUnet is free software: you can redistribute it and/or modify it
;;   under the terms of the GNU Affero General Public License as published
;;   by the Free Software Foundation, either version 3 of the License,
;;   or (at your option) any later version.
;;
;;   GNUnet is distributed in the hope that it will be useful, but
;;   WITHOUT ANY WARRANTY; without even the implied warranty of
;;   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
;;   Affero General Public License for more details.
;;
;;   You should have received a copy of the GNU Affero General Public License
;;   along with this program.  If not, see <http://www.gnu.org/licenses/>.
;;
;;   SPDX-License-Identifier: AGPL-3.0-or-later
;;
;;   As a special exception to the GNU Affero General Public License,
;;   the file may be relicensed under any license used for
;;   most source code of GNUnet 0.13.1, or later versions, as published by
;;   GNUnet e.V.

;; Upstream author: Christian Grothoff
;; Upstream source: gnunet-0.13.1/util/container_meta_data.c
;; Scheme port author: Maxime Devos
;; Scheme module: (gnu gnunet metadata)
;; Brief: Storing of meta data

;; Deviations from upstream:
;;  * file names in meta-data are not automatically POSIXified.

;; TODO: (de-)serialisation, dependencies, other procedures
(library (gnu gnunet metadata)
  (export meta-item? meta-item-mime-type meta-item-data meta-item-format
	  make-meta-item meta-item=?
	  meta-data? make-meta-data meta-data-extend meta-data=?)
  (import (rnrs arithmetic bitwise)
	  (rnrs base)
	  (rnrs control)
	  (rnrs records syntactic)
	  (rnrs bytevectors)
	  (rnrs lists)
	  (only (gnu extractor metaformats)
		METAFORMAT_UTF8
		METAFORMAT_C_STRING
		METAFORMAT_BINARY
		meta-format?
		integer->meta-format)
	  (only (gnu extractor metatypes)
		integer->meta-type
		meta-type?)
	  (gnu gnunet utils netstruct)
	  (gnu gnunet utils bv-slice)
	  (gnu gnunet metadata struct)
	  (only (gnu gnunet utils decompress) decompress)
	  (only (gnu gnunet utils hat-let) let^)
	  (only (srfi srfi-31) rec)
	  (only (srfi srfi-43) vector-unfold)
	  (only (srfi srfi-45) delay force))

  ;; An arbitrary implementation limit on buffer sizes.
  (define GNUNET_MAX_MALLOC_CHECKED (* 40 1024 1024))

  (define HEADER_VERSION_MASK #x7FFFFFFF)

  ;; Meta data item
  (define-record-type (<meta-item> %make-meta-item meta-item?)
    ;; Name of extracting plugin (an ASCII string, or #f)
    (fields (immutable plugin-name meta-item-plugin)
	    ;; Mime-type of data (an ASCII string, or #f).
	    (immutable mime-type meta-item-mime-type)
	    ;; The actual meta data (bytevector).
	    (immutable data %meta-item-data)
	    ;; Type of the meta data (<meta-type>).
	    (immutable type meta-item-type)
	    ;; Format of the meta data.
	    (immutable format meta-item-format))
    (opaque #t)
    (sealed #t))

  (define (meta-item-data-size item)
    "How large is the @lisp{meta-item-data} of the <meta-item>
@var{item}? Expressed in bytes."
    (bytevector-length (%meta-item-data item)))

  (define (meta-item=? x y)
    "Are two <meta-item> equal?"
    (assert (meta-item? x))
    (assert (meta-item? y))
    (equal? x y))

  (define (make-meta-item plugin-name mime-type data type format)
    "Construct a meta data item"
    ;; TODO: make strings read-only when running on Guile Scheme.
    ;; (RNRS scheme doesn't have a string-set! procedure,
    ;; so portable sandboxes can still use this module safely)
    (assert (or mime-type (string? mime-type)))

    (%make-meta-item plugin-name mime-type data type format))

  ;; Meta data to associate with a file, directory or namespace.
  (define-record-type (<meta-data> %make-meta-data meta-data?)
    ;; Vector of the meta data items.
    ;; (TODO: perhaps a functional deque would be faster)
    (fields (immutable items meta-data-items)
	    ;; Complete serialized and compressed buffer of the items,
	    ;; as a promised bytevector.
	    (immutable sbuf meta-data-sbuf-promise))
    (opaque #t)
    (sealed #t))

  (define (%vector->meta-data item-vec)
    "Create a fresh <meta-data> with some items (no type-checking)"
    (rec meta-data
	 (%make-meta-data item-vec
			  (delay (make-sbuf meta-data)))))

  (define (make-meta-data)
    "Create a fresh <meta-data>"
    (%vector->meta-data (vector)))

  ;; TODO: perhaps this may be useful?
  #; (define (forget-sbuf meta-data)
    "The serialization buffer is no longer relevant, regenerate it
lazyily.

@var{meta-data}: meta data to forget serialization buffer of"
  frob)

  ;; GNUNET_CONTAINER_meta_data_test_equal isn't ported.
  ;; It doesn't compare the mime types, so it doesn't check
  ;; for equality in the sense of @lisp{equal?}.
  (define (meta-data=? x y)
    "Test if two MDs are equal.  We consider them equal if
the meta types, formats, content and mime type match.
(Warning: the C port doesn't check the mime type)"
    "Compare two meta data items for equality.

Warning: two equal MD are not necessarily @lisp{equal?} (TODO: yet)."
    (assert (meta-data? x))
    (assert (meta-data? y))
    ;; ignore meta-data-sbuf-promise
    (or (eq? x y)
	(and (equal? (meta-data-items x)
		     (meta-data-items y)))))

  (define (meta-data-extend meta plugin-name type format data-mime-type data)
    "Extend metadata.  Note that the list of meta data items is
sorted by size (largest first).

Return the updated meta-data, and #f if this entry already exists, #t
otherwise.  If the entry already exists (identified by @var{type}
and @var{data}), don't change the old entry, except for defining
the mime type if it wasn't set previously, and making the meta
format more specific.

Deviation from upstream: upstream changes directory separators to
POSIX style ('/') for some meta data, this port doesn't.

Entries are identified by @var{type} and @var{data}.

@var{meta} metadata to extend
@var{plugin-name} plugin_name name of the plugin that produced this value;
special values can be used (i.e. '&lt;zlib&gt;' for zlib being
used in the main libextractor library and yielding
meta data) name of extracting plugin
@var{type} libextractor-type describing the meta data
@var{format} basic format information about data
@var{data-mime-type} mime-ype of data (not of original file);
  can be @lisp{#f} (if mime-type is not known) (immutable)
@var{data} actual meta-data found (bytevector)"
    (assert (meta-data? meta))
    (assert (string? plugin-name)) ;; TODO perhaps check for \0 bytes
    (assert (meta-type? type))
    (assert (meta-format? format))
    (assert (or (not data-mime-type) (string? data-mime-type)))
    (assert (bytevector? data))
    ;; Figure out where to insert or set the meta data.
    ;; TODO: binary search instead of linear search
    (let* ((items (meta-data-items meta))
	   (items-length (vector-length items)))
      (let loop ((i 0))
	(cond ((or (>= i items-length)
		   (< (meta-item-data-size (vector-ref items i))))
	       ;; A new entry: insert at the end of the item vector,
	       ;; or earlier. TODO: read-only bytevectors & strings
	       (let* ((meta-item (%make-meta-item plugin-name
						  data-mime-type
						  (bytevector-copy data)
						  type
						  format))
		      (new-items (vector-insert items i meta-item)))
		 (values (%vector->meta-data new-items)
			 #t)))
	      ((and (equal? (meta-item-type (vector-ref items i))
			    type)
		    (bytevector=? (%meta-item-data (vector-ref items i))
				  data))
	       ;; If format and mime-type aren't changed,
	       ;; just keep the old structure (freshness is not required).
	       (let* ((old-item (vector-ref items i))
		      (new-mime-type (or (meta-item-mime-type old-item)
					 data-mime-type))
		      (old-format (meta-item-format old-item))
		      (new-format
		       (if (and (equal? old-format METAFORMAT_C_STRING)
				(equal? format METAFORMAT_UTF8))
			   METAFORMAT_UTF8
			   old-format))
		      (new-item (%make-meta-item new-mime-type
						 (%meta-item-data old-item)
						 meta-item-data
						 meta-item-format)))
		 (if (equal? old-item new-item)
		     (values meta #f)
		     (%vector->meta-data (vector-replace items i
							 new-item)))))
	      (else (loop (+ 1 i)))))))

  (define (vector-insert vec i x)
    "Insert @var{x} into the vector @var{vec} at offset @var{i}"
    (vector-unfold (lambda (j)
		     (cond ((< j i) (vector-ref vec j))
			   ((= j i) x)
			   ((> j i) (vector-ref vec (- j 1)))))
		   (+ 1 (vector-length vec))))

  (define (vector-replace vec i x)
    "Replace the element at offset @var{i} in @var{vec} by @var{x}"
    (vector-unfold (lambda (j)
		     (cond ((= j i) x)
			   (else (vector-ref vec j))))
		   (vector-length vec)))

  (define (bv-slice bv offset length)
    "Copy @var{length} bytes from @var{bv}, starting at @var{offset}."
    (let ((bv-new (make-bytevector length)))
      (bytevector-copy! bv offset bv-new 0 length)
      bv-new))

  (define (meta-data-deserialize slice)
    "Deserialize meta-data, as a <meta-data>.

The serialized meta-data is passed as a readable slice @var{slice}.
In case of success, return an appropriate @code{<meta-data>}.
In case of a parsing error, return @code{#f}.
(Unsupported versions count as parsing errors.)

TODO: perhaps a variant raising conditions may be more informative."
    ;; Argument checks
    (let^ ((!! (slice? slice))
	   (!! (slice-readable? slice))
	   ;; Header checks
	   (? (< (size-length slice) (sizeof MetaDataHeader ())) #f)
	   (! header (slice-slice slice 0 (sizeof MetaDataHeader ())))
	   (! version (bitwise-and (read% MetaDataHeader ("version") header)
				   HEADER_VERSION_MASK))
	   (? (not (= 2 version)) #f) ; unsupported version
	   (! ic (read% MetaDataHeader ("entries") header))
	   (! data-size (read% MetaDataHeader ("size") header))
	   (? (or (> (* ic (sizeof MetaDataEntry ())) data-size)
		  (and (not (= 0 ic))
		       ;; TODO: isn't this clause redundant?
		       (< data-size
			  (* ic (sizeof MetaDataEntry ())))))
	      #f)
	   ;; Decompression
	   (! compressed?
	      (not (= 0 (bitwise-and
			 (read% MetaDataHeader ("version") header)))))
	   (! cdata
	      (let ((maybe-compressed
		     (slice-slice slice (sizeof MetaDataHeader ()))))
		(cond ((not compressed?)
		       maybe-compressed)
		      ((>= data-size GNUNET_MAX_MALLOC_CHECKED)
		       ;; make sure we don't blow our memory limit because
		       ;; of a mal-formed message... 40 MiB seems rather
		       ;; large to encounter in the wild, so this
		       ;; is unlikely to be a problem.
		       #f)
		      (else
		       (decompress maybe-compressed data-size)))))
	   ;; Check decompression was successful
	   (? (not cdata) #f)
	   (! mdata (slice-slice cdata (* ic (sizeof MetaDataEntry ()))))
	   ;; Loop over metadata
	   (/o/ loop-metadata
		(i 0)
		(md (make-meta-data))
		(left (- data-size (* ic (sizeof MetaDataEntry ())))))
	   (? (>= i ic) md) ;; all metadata is deserialised
	   (! from-entry-till-end
	      (slice-slice cdata (* ic (sizeof MetaDataEntry ()))))
	   (! entry-header
	      (slice-slice from-entry-till-end
			   0 (sizeof MetaDataEntry)))
	   (! format (read% MetaDataEntry ("format") entry-header))
	   ;; Bail out if the metaformat is unrecognised
	   ;; FIXME why did I write 0 here?
	   (? (not (member 0 `(,METAFORMAT_UTF8 ,METAFORMAT_C_STRING
						,METAFORMAT_BINARY)))
	      ;; TODO: upstream returns incomplete @var{md}
	      ;; in this case! Return NULL instead!
	      ;; (An incomplete @var{md} is returned in
	      ;; some other cases as well.)
	      #f)
	   (! entry-data-length
	      (read% MetaDataEntry ("data-size") entry-header))
	   (! plugin-name-length
	      (read% MetaDataEntry ("plugin-name-length") entry-header))
	   (! mime-type-length
	      (read% MetaDataEntry ("mime-type-length") entry-header))
	   (? (> entry-data-length left) #f)
	   (! left (- left entry-data-length))
	   (! meta-data-offset
	      (+ mdata-offset left))
	   ;; Strings are terminated with a \0
	   ;; TODO: upstream doesn't check the location of
	   ;; the **first** \0. Is this intentional or irrelevant?
	   (? (and (member format
			   `(,METAFORMAT_UTF8 ,METAFORMAT_C_STRING))
		   (or (= 0 entry-data-length)
		       (not (= (bytevector-u8-ref
				cdata-bv
				(+ meta-data-offset
				   (- entry-data-length 1)))))))
	      #f)
	   (? (> plugin-name-length left) #f)
	   (! left (- left plugin-name-length))
	   (? (and (> plugin-name-length 0)
		   (not (= 0 (bytevector-u8-ref
			      cdata-bv
			      (+ mdata-offset
				 left
				 plugin-name-length
				 -1)))))
	      #f)
	   ;; FIXME plen or entry-data-length
	   ;; Does not include terminating \0.
	   (! plugin-bv
	      (and (> plugin-name-length 0)
		   (bv-slice cdata-bv (+ mdata-offset left)
			     (- plugin-name-length 1))))
	   ;; There isn't any formal requirement for
	   ;; being encoded as UTF-8 as far as I know,
	   ;; but in practice this will probably be ASCII,
	   ;; which is a subset of UTF-8.
	   (! plugin-string
	      (and plugin-bv (utf8->string plugin-bv)))
	   (? (> mime-type-length left) #f)
	   (! left (- left mime-type-length))
	   (? (and (> mime-type-length 0)
		   (< 0 (bytevector-u8-ref cdata-bv
					   (+ mdata-offset
					      mime-type-length
					      -1))))
	      #f)
	   (! mime-type-string
	      (and (< 0 mime-type-length)
		   (utf8->string (bv-slice cdata-bv
					   (+ mdata-offset
					      left -1)
					   (- mime-type-length 1)))))
	   (! new-md
	      (meta-data-extend
	       md plugin-string
	       (read% MetaDataEntry ("type") entry)
	       format
	       mime-type-string
	       (bv-slice cdata-bv meta-data-offset
			 entry-data-length))))
	  (loop-metadata (+ i 1)
			 new-md
			 left)))

  (define (break)
    "This state seems rather suspicious, but not necessarily incorrect."
    #f)

  (define-syntax ==>
    (syntax-rules ()
      ((_ P Q)
       (if P
	   Q
	   #t))))

  (define (meta-data-serialize/uncached meta-data options)
    ;; TODO: serialisation cache
    "Serialize @var{meta-data} into a fresh bytevector

Return the number of bytes written on success,
or @code{#f} on error. (FIXME: raise a condition instead)

@var{meta-data} <meta-data> to serialize
@var{options}: TODO (which compression method to use, is a subset
of the metadata acceptable)"
    (let^ ((! size
	      (vector-fold
	       (lambda (m)
		 (+ (sizeof MetaDataEntry ())
		    (meta-item-data-size m)
		    ;; Is ASCII, therefore
		    ;; string length and
		    ;; byte length
		    ;; coincide.
		    (or (string-length
			 (meta-item-plugin-name m))
			0)
		    (or (string-length
			 (meta-item-mime-type m))
			0)))
	       0
	       (meta-data-items meta-data)))
	   (? (>= size GNUNET_MAX_MALLOC_CHECKED)
	      ;; too large to be processed by upstream
	      #f)
	   (! ent-bv (make-bytevector size))
	   (! mdata-offset
	      (* (sizeof MetaDataEntry ())
		 (meta-data-item-count meta-data)))
	   (_ (let^ ((/o/ meta-item-loop
			  (i 0)
			  (off (- size
				  (* (sizeof MetaDataEntry ())
				     (meta-data-item-count meta-data)))))
		     (? (>= i (meta-data-item-count meta-data))
			(assert (= 0 off))
			'done)
		     (! item (vector-ref (meta-data-items meta-data) i))
		     (! ent-offset (* i (sizeof MetaDataEntry ())))
		     (_ (set%! MetaDataEntry (type) ent-slice (meta-item-type item)))
		     (_ (set%! MetaDataEntry (format) ent-slice (meta-item-format item)))
		     (_ (set%! MetaDataEntry (data-size) ent-slice (meta-item-data-size item)))
		     (! pname (meta-item-plugin-name item))
		     (! mime (meta-item-mime-type item))
		     (! plugin-bv (and pname (string->utf8 pname)))
		     (! mime-bv (and mime (string->utf8 mime)))
		     ;; Add 1 byte for terminating \0.
		     (_ (set%! MetaDataEntry ("plugin-name-length") ent
			       (if plugin-bv
				   (1+ (bytevector-length plugin-bv))
				   0)))
		     (_ (set%! MetaDataEntry ("mime-type-length") ent
			       (if mime-bv
				   (+ 1 (bytevector-length mime-bv))
				   0)))
		     (! off (- off (meta-item-data-size item)))
		     ;; Check for \0 bytes
		     ;; TODO: perform this check elsewhere
		     ;; TODO: check all bytes
		     (? (not (==> (member (meta-item-format item)
					  `(,METAFORMAT_C_STRING
					    ,METAFORMAT_UTF8))
				  (= (bytevector-u8-ref
				      (%meta-item-data item)
				      (bytevector-length
				       (%meta-item-data item)))
				     0)))
			(break))
		     (_ (bytevector-copy!
			 (%meta-item-data item)
			 0
			 ent-bv
			 (+ mdata-offset off)
			 (meta-item-data-size item)))
		     ;; Copy mime type, plugin name
		     ;; and add a terminating \0 byte.
		     (! off (- off (if plugin-bv
				       (+ 1 (bytevector-length plugin-bv))
				       0)))
		     (_ (when plugin-bv
			  (bytevector-copy!
			   plugin-bv 0
			   ent-bv (+ mdata-offset off)
			   (bytevector-length plugin-bv))
			  (bytevector-u8-set!
			   ent-bv
			   (+ mdata-offset off
			      (bytevector-length plugin-bv))
			   0)))
		     (! off (- off
			       (if mime-bv
				   (+ 1 (bytevector-length mime-bv))
				   0)))
		     (_ (when mime-bv
			  (bytevector-copy!
			   mime-bv 0
			   ent-bv (+ mdata-offset off)
			   (bytevector-length mime-bv))
			  (bytevector-u8-set!
			   ent-bv
			   (+ mdata-offset off
			      (bytevector-length mime-bv))
			   0))))
		    (meta-item-loop
		     (+ 1 i)
		     off)))
	   ;; Don't include upstream loop #2, it is a loop
	   ;; for throwing away meta data until
	   ;; everything fits in the buffer ... which doesn't
	   ;; have a use (yet).
	   (! i 0)
	   (? (>= i (meta-data-item-count meta-data))
	      ;; No meta data, only write header
	      (let^ ((! result (make-slice/read-write
				(sizeof MetaDataHeader ())))
		     (_ (set%! MetaDataHeader (version) result 2))
		     (_ (set%! MetaDataHeader (entries) result 0))
		     (_ (set%! MetaDataHeader (size!)   result 0 0)))
		    result-bv))
	   (! left size)
	   (! ent-offset
	      (+ (* i (sizeof MetaDataEntry ()))))
	      ;; TODO in upstream, it is possible to request
	      ;; no compression
	   (! cdata (try-compression ent-bv ent-offset left))
	   (! maybe-compressed-length
	      (if cdata
		  (bytevector-length cdata)
		  left))
	   (! hdr (make-bytevector (+ (sizeof MetaDataHeader ())
				      maybe-compessed-length)))
	   ;; TODO proper #f or condition on overflow
	   (_ (set%! MetaDataHeader (size) hdr left))
	   (_ (set%! MetaDataHeader (entries) hdr
		     (meta-data-item-count meta-data)))
	   (!! (==> cdata (< (bytevector-length cdata) left)))
	   (_ (set%! MetaDataHeader (version hdr)
		     (bitwise-ior 2 (if cdata
					HEADER_COMPRESSED
					0))))
	   (_  (bytevector-copy! (or cdata ent-bv)
				 (if cdata 0 ent-offset)
				 hdr (sizeof MetaDataHeader ())
				 maybe-compressed-length)))
	  hdr)))
